Confirmatory Factor Analysis and Structural Equation Models

Work in Progress

cfa
sem
measurment
library(lavaan)
library(dplyr)
library(reticulate)
library(marginaleffects)
library(modelsummary)
library(ggplot2)
library(egg)
library(lme4)
library(semPlot)
library(tinytable)
library(kableExtra)
library(reshape2)

options(rstudio.python.installationPath = "/Users/nathanielforde/mambaforge/envs")
options("modelsummary_factory_default" = "tinytable")
options(repr.plot.width=15, repr.plot.height=8)

knitr::knit_engines$set(python = reticulate::eng_python)
options(scipen=999)
set.seed(130)
Warning

This is a work in progress. We intend to elaborate the choices related to analysing and understanding the data elicted through intentional psychometrics surveys

Measurment and Measurment Constructs

df = read.csv('sem_data.csv')
inflate <- df$region == 'west'
noise <- rnorm(sum(inflate), 0.5, 1) # generate the noise to add
df$ls_p3[inflate] <- df$ls_p3[inflate] + noise
df$ls_sum <- df$ls_p1 + df$ls_p2 + df$ls_p3
df$ls_mean <- rowMeans(df[ c('ls_p1', 'ls_p2', 'ls_p3')])

head(df) |> kable()
ID region gender age se_acad_p1 se_acad_p2 se_acad_p3 se_social_p1 se_social_p2 se_social_p3 sup_friends_p1 sup_friends_p2 sup_friends_p3 sup_parents_p1 sup_parents_p2 sup_parents_p3 ls_p1 ls_p2 ls_p3 ls_sum ls_mean
1 west female 13 4.857143 5.571429 4.500000 5.80 5.500000 5.40 6.5 6.5 7.0 7.0 7.0 6.0 5.333333 6.75 7.647112 19.73044 6.576815
2 west male 14 4.571429 4.285714 4.666667 5.00 5.500000 4.80 4.5 4.5 5.5 5.0 6.0 4.5 4.333333 5.00 4.469623 13.80296 4.600985
10 west female 14 4.142857 6.142857 5.333333 5.20 4.666667 6.00 4.0 4.5 3.5 7.0 7.0 6.5 6.333333 5.50 4.710020 16.54335 5.514451
11 west female 14 5.000000 5.428571 4.833333 6.40 5.833333 6.40 7.0 7.0 7.0 7.0 7.0 7.0 4.333333 6.50 5.636198 16.46953 5.489844
12 west female 14 5.166667 5.600000 4.800000 5.25 5.400000 5.25 7.0 7.0 7.0 6.5 6.5 7.0 5.666667 6.00 5.266592 16.93326 5.644419
14 west male 14 4.857143 4.857143 4.166667 5.20 5.000000 4.20 5.5 6.5 7.0 6.5 6.5 6.5 5.000000 5.50 5.913709 16.41371 5.471236

Candidate Structure

#| code-fold: true
#| code-summary: "Show the code"

datasummary_skim(df)|> 
 style_tt(
   i = 15:17,
   j = 1:1,
   background = "#20AACC",
   color = "white",
   italic = TRUE) |> 
 style_tt(
   i = 18:19,
   j = 1:1,
   background = "#2888A0",
   color = "white",
   italic = TRUE) |> 
 style_tt(
   i = 2:14,
   j = 1:1,
   background = "#17C2AD",
   color = "white",
   italic = TRUE)
tinytable_ftyu5q15n5ibjk84ch05
Unique Missing Pct. Mean SD Min Median Max Histogram
ID 283 0 187.9 106.3 1.0 201.0 367.0
age 5 0 14.7 0.8 13.0 15.0 17.0
se_acad_p1 32 0 5.2 0.8 3.1 5.1 7.0
se_acad_p2 36 0 5.3 0.7 3.1 5.4 7.0
se_acad_p3 29 0 5.2 0.8 2.8 5.2 7.0
se_social_p1 24 0 5.3 0.8 1.8 5.4 7.0
se_social_p2 27 0 5.5 0.7 2.7 5.5 7.0
se_social_p3 31 0 5.4 0.8 3.0 5.5 7.0
sup_friends_p1 13 0 5.8 1.1 1.0 6.0 7.0
sup_friends_p2 10 0 6.0 0.9 2.5 6.0 7.0
sup_friends_p3 13 0 6.0 1.1 1.0 6.0 7.0
sup_parents_p1 11 0 6.0 1.1 1.0 6.0 7.0
sup_parents_p2 11 0 5.9 1.1 2.0 6.0 7.0
sup_parents_p3 13 0 5.7 1.1 1.0 6.0 7.0
ls_p1 15 0 5.2 0.9 2.0 5.3 7.0
ls_p2 21 0 5.8 0.7 2.5 5.8 7.0
ls_p3 161 0 5.5 1.1 1.7 5.6 9.6
ls_sum 218 0 16.5 2.2 8.2 16.7 21.0
ls_mean 217 0 5.5 0.7 2.7 5.6 7.0
N %
region east 142 50.2
west 141 49.8
gender female 132 46.6
male 151 53.4
drivers = c('se_acad_p1', 'se_acad_p2', 'se_acad_p3', 'se_social_p1', 'se_social_p2', 'se_social_p3', 'sup_friends_p1','sup_friends_p2', 'sup_friends_p3', 'sup_parents_p1' , 'sup_parents_p2' , 'sup_parents_p3', 'ls_p1', 'ls_p2', 'ls_p3')



plot_heatmap <- function(df, title="Sample Covariances", subtitle="Observed Measures") {
  heat_df = df |> as.matrix() |> melt()
  colnames(heat_df) <- c("x", "y", "value")
  g <- heat_df |> mutate(value = round(value, 2)) |>  ggplot(aes(x = x, y = y, fill = value)) +
    geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
   scale_fill_gradient2(
      high = 'dodgerblue4',
      mid = 'white',
      low = 'firebrick2'
    ) + theme(axis.text.x = element_text(angle=45)) + ggtitle(title, subtitle)
  
  g
}

g1 = plot_heatmap(cov(df[,  drivers]))

g2 = plot_heatmap(cor(df[,  drivers]), "Sample Correlations")

plot <- ggarrange(g1,g2, ncol=1, nrow=2);

Fit Initial Regression Models

formula_sum_1st = " ls_sum ~ se_acad_p1  + se_social_p1 +  sup_friends_p1  + sup_parents_p1"
formula_mean_1st = " ls_mean ~ se_acad_p1  + se_social_p1 +  sup_friends_p1  + sup_parents_p1"

formula_sum_12 = " ls_sum ~ se_acad_p1  + se_acad_p2 +  se_social_p1 + se_social_p2 + 
sup_friends_p1 + sup_friends_p2  + sup_parents_p1 + sup_parents_p2"
formula_mean_12 = " ls_mean ~ se_acad_p1  + se_acad_p2 +  se_social_p1 + se_social_p2 + 
sup_friends_p1 + sup_friends_p2  + sup_parents_p1 + sup_parents_p2"


formula_sum = " ls_sum ~ se_acad_p1 + se_acad_p2 + se_acad_p3 + se_social_p1 +  se_social_p2 + se_social_p3 +  sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + sup_parents_p1 + sup_parents_p2 + sup_parents_p3"
formula_mean = " ls_mean ~ se_acad_p1 + se_acad_p2 + se_acad_p3 + se_social_p1 +  se_social_p2 + se_social_p3 +  sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + sup_parents_p1 + sup_parents_p2 + sup_parents_p3"
mod_sum = lm(formula_sum, df)
mod_sum_1st = lm(formula_sum_1st, df)
mod_sum_12 = lm(formula_sum_12, df)
mod_mean = lm(formula_mean, df)
mod_mean_1st = lm(formula_mean_1st, df)
mod_mean_12 = lm(formula_mean_12, df)

min_max_norm <- function(x) {
    (x - min(x)) / (max(x) - min(x))
}

df_norm <- as.data.frame(lapply(df[c(5:19)], min_max_norm))

df_norm$ls_sum <- df$ls_sum
df_norm$ls_mean <- df$ls_mean

mod_sum_norm = lm(formula_sum, df_norm)
mod_mean_norm = lm(formula_mean, df_norm)

models = list(
    "Outcome: sum_score" = list("model_sum_1st_factors" = mod_sum_1st,
     "model_sum_1st_2nd_factors" = mod_sum_12,
     "model_sum_score" = mod_sum,
     "model_sum_score_norm" = mod_sum_norm
     ),
    "Outcome: mean_score" = list(
      "model_mean_1st_factors" = mod_mean_1st,
     "model_mean_1st_2nd_factors" = mod_mean_12,
     "model_mean_score"= mod_mean, 
     "model_mean_score_norm" = mod_mean_norm
    )
    )
#| code-fold: true
#| code-summary: "Show the code"

modelsummary(models, stars=TRUE, shape ="cbind") |> 
 style_tt(
   i = 2:25,
   j = 1:1,
   background = "#17C2AD",
   color = "white",
   italic = TRUE)
tinytable_jb9uvnwer8qg2neciem3
Outcome: sum_score Outcome: mean_score
model_sum_1st_factors model_sum_1st_2nd_factors model_sum_score model_sum_score_norm model_mean_1st_factors model_mean_1st_2nd_factors model_mean_score model_mean_score_norm
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 6.335*** 4.038*** 3.709*** 9.081*** 2.112*** 1.346*** 1.236*** 3.027***
(0.979) (1.080) (1.071) (0.739) (0.326) (0.360) (0.357) (0.246)
se_acad_p1 0.231 -0.023 -0.068 -0.264 0.077 -0.008 -0.023 -0.088
(0.158) (0.197) (0.216) (0.833) (0.053) (0.066) (0.072) (0.278)
se_social_p1 1.039*** 0.515* 0.401+ 2.085+ 0.346*** 0.172* 0.134+ 0.695+
(0.175) (0.224) (0.224) (1.167) (0.058) (0.075) (0.075) (0.389)
sup_friends_p1 0.069 -0.263+ -0.273 -1.636 0.023 -0.088+ -0.091 -0.545
(0.095) (0.145) (0.169) (1.011) (0.032) (0.048) (0.056) (0.337)
sup_parents_p1 0.518*** 0.196 0.050 0.299 0.173*** 0.065 0.017 0.100
(0.107) (0.155) (0.161) (0.964) (0.036) (0.052) (0.054) (0.321)
se_acad_p2 0.415+ 0.382+ 1.475+ 0.138+ 0.127+ 0.492+
(0.216) (0.227) (0.875) (0.072) (0.076) (0.292)
se_social_p2 0.662** 0.483+ 2.093+ 0.221** 0.161+ 0.698+
(0.233) (0.246) (1.065) (0.078) (0.082) (0.355)
sup_friends_p2 0.358* 0.366* 1.647* 0.119* 0.122* 0.549*
(0.172) (0.180) (0.808) (0.057) (0.060) (0.269)
sup_parents_p2 0.375* 0.166 0.829 0.125* 0.055 0.276
(0.151) (0.162) (0.808) (0.050) (0.054) (0.269)
se_acad_p3 -0.072 -0.302 -0.024 -0.101
(0.196) (0.815) (0.065) (0.272)
se_social_p3 0.350+ 1.400+ 0.117+ 0.467+
(0.180) (0.721) (0.060) (0.240)
sup_friends_p3 0.056 0.334 0.019 0.111
(0.146) (0.878) (0.049) (0.293)
sup_parents_p3 0.452** 2.710** 0.151** 0.903**
(0.141) (0.847) (0.047) (0.282)
Num.Obs. 283 283 283 283 283 283 283 283
R2 0.332 0.389 0.420 0.420 0.332 0.389 0.420 0.420
R2 Adj. 0.323 0.371 0.394 0.394 0.323 0.371 0.394 0.394
AIC 1134.2 1117.0 1110.3 1110.3 512.4 495.2 488.5 488.5
BIC 1156.1 1153.5 1161.3 1161.3 534.2 531.7 539.5 539.5
Log.Lik. -561.090 -548.507 -541.139 -541.139 -250.183 -237.600 -230.232 -230.232
RMSE 1.76 1.68 1.64 1.64 0.59 0.56 0.55 0.55
models = list(
     "model_sum_1st_factors" = mod_sum_1st,
     "model_sum_1st_2nd_factors" = mod_sum_12,
     "model_sum_score" = mod_sum,
     "model_sum_score_norm" = mod_sum_norm,
     "model_mean_1st_factors" = mod_mean_1st,
     "model_mean_1st_2nd_factors" = mod_mean_12,
     "model_mean_score"= mod_mean,
     "model_mean_score_norm" = mod_mean_norm
    )

modelplot(models, coef_omit = 'Intercept') + geom_vline(xintercept = 0, linetype="dotted", 
                color = "black") + ggtitle("Comparing Model Parameter Estimates", "Across Covariates")

Significant Coefficients

g1 = modelplot(mod_sum, coef_omit = 'Intercept') +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"),  guide = "none")


g2 = modelplot(mod_mean, coef_omit = 'Intercept') +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"))

plot <- ggarrange(g1,g2, ncol=2, nrow=1);

Aggregate Driver Scores

df$se_acad_mean <- rowMeans(df[c('se_acad_p1', 'se_acad_p2', 'se_acad_p3')])
df$se_social_mean <- rowMeans(df[c('se_social_p1', 'se_social_p2', 'se_social_p3')])
df$sup_friends_mean <- rowMeans(df[c('sup_friends_p1', 'sup_friends_p2', 'sup_friends_p3')])
df$sup_parents_mean <- rowMeans(df[c('sup_parents_p1', 'sup_parents_p2', 'sup_parents_p3')])


formula_parcel_mean = "ls_mean ~ se_acad_mean + se_social_mean + sup_friends_mean + sup_parents_mean"

formula_parcel_sum = "ls_sum ~ se_acad_mean + se_social_mean + sup_friends_mean + sup_parents_mean"

mod_sum_parcel = lm(formula_parcel_sum, df)
mod_mean_parcel = lm(formula_parcel_mean, df)

models_parcel = list("model_sum_score" = mod_sum_parcel,
     "model_mean_score"= mod_mean_parcel
     )

modelsummary(models_parcel, stars=TRUE)
tinytable_or59mjdhizio3l7k66t7
model_sum_score model_mean_score
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 4.378*** 1.459***
(1.036) (0.345)
se_acad_mean 0.252 0.084
(0.176) (0.059)
se_social_mean 1.205*** 0.402***
(0.195) (0.065)
sup_friends_mean 0.049 0.016
(0.108) (0.036)
sup_parents_mean 0.685*** 0.228***
(0.111) (0.037)
Num.Obs. 283 283
R2 0.397 0.397
R2 Adj. 0.388 0.388
AIC 1105.3 483.5
BIC 1127.1 505.3
Log.Lik. -546.638 -235.731
RMSE 1.67 0.56
g1 = modelplot(mod_sum_parcel, coef_omit = 'Intercept') +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"), guide = "none")


g2 = modelplot(mod_mean_parcel, coef_omit = 'Intercept') +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"))

plot <- ggarrange(g1,g2, ncol=2, nrow=1);

Hierarchical Models

formula_hierarchy_mean = "ls_mean ~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3 + sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + se_acad_p1 + se_acad_p2 + se_acad_p3 +
se_social_p1 + se_social_p2 + se_social_p3  + (1 | region)"

formula_hierarchy_sum = "ls_sum ~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3 + sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + se_acad_p1 + se_acad_p2 + se_acad_p3 +
se_social_p1 + se_social_p2 + se_social_p3 + (1 | region)"

hierarchical_mean_fit <- lmer(formula_hierarchy_mean, data = df, REML = TRUE)
hierarchical_sum_fit <- lmer(formula_hierarchy_sum, data = df, REML = TRUE)

g1 = modelplot(hierarchical_mean_fit, re.form=NA) +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"),  guide = "none")

g2 = modelplot(hierarchical_sum_fit, re.form=NA) +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"))


plot <- ggarrange(g1,g2, ncol=2, nrow=1);
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).
Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).

modelsummary(list("hierarchical_mean_fit"= hierarchical_mean_fit,
                  "hierarchical_sum_fit"= hierarchical_sum_fit), 
             stars = TRUE) |> 
 style_tt(
   i = 2:25,
   j = 1:1,
   background = "#17C2AD",
   color = "white",
   italic = TRUE)
tinytable_ftl5t6kstw3adyosicbz
hierarchical_mean_fit hierarchical_sum_fit
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 0.903* 2.710*
(0.385) (1.154)
sup_parents_p1 -0.007 -0.021
(0.053) (0.160)
sup_parents_p2 0.070 0.210
(0.053) (0.159)
sup_parents_p3 0.160*** 0.480***
(0.046) (0.139)
sup_friends_p1 -0.091+ -0.274+
(0.055) (0.166)
sup_friends_p2 0.125* 0.376*
(0.059) (0.177)
sup_friends_p3 0.024 0.072
(0.048) (0.144)
se_acad_p1 -0.041 -0.122
(0.071) (0.213)
se_acad_p2 0.131+ 0.393+
(0.074) (0.223)
se_acad_p3 0.039 0.116
(0.067) (0.202)
se_social_p1 0.094 0.283
(0.075) (0.224)
se_social_p2 0.191* 0.574*
(0.081) (0.244)
se_social_p3 0.130* 0.389*
(0.059) (0.178)
SD (Intercept region) 0.160 0.481
SD (Observations) 0.549 1.648
Num.Obs. 283 283
R2 Marg. 0.424 0.424
R2 Cond. 0.469 0.469
AIC 538.4 1131.6
BIC 593.1 1186.3
ICC 0.1 0.1
RMSE 0.54 1.61

Marginal Effects

pred <- predictions(hierarchical_mean_fit, newdata = datagrid(sup_parents_p3 = 1:10, region=c('east', 'west')), re.form=NA)
pred1 <- predictions(mod_mean, newdata = datagrid(sup_parents_p2 = 1:10))

pred1 |> head() |> kableExtra::kable()
rowid estimate std.error statistic p.value s.value conf.low conf.high se_acad_p1 se_acad_p2 se_acad_p3 se_social_p1 se_social_p2 se_social_p3 sup_friends_p1 sup_friends_p2 sup_friends_p3 sup_parents_p1 sup_parents_p3 sup_parents_p2 ls_mean
1 5.232276 0.2673967 19.56747 0 280.8136 4.708188 5.756363 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 1 6.576815
2 5.287553 0.2140733 24.69973 0 445.0319 4.867977 5.707128 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 2 6.576815
3 5.342829 0.1610972 33.16525 0 798.8130 5.027085 5.658574 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 3 6.576815
4 5.398106 0.1089765 49.53461 0 Inf 5.184516 5.611696 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 4 6.576815
5 5.453383 0.0599835 90.91472 0 Inf 5.335818 5.570949 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 5 6.576815
6 5.508660 0.0334480 164.69327 0 Inf 5.443103 5.574217 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 6 6.576815

Regression Marginal Effects

g = plot_predictions(mod_mean, condition = c("sup_parents_p3"), type = "response") + ggtitle("Counterfactual Shift of Outcome: sup_parents_p3", "Holding all else Fixed")

g1 = plot_predictions(mod_mean, condition = c("sup_friends_p1"), type = "response") + ggtitle("Counterfactual Shift of Outcome: sup_friends_p1", "Holding all else Fixed")

g2 = plot_predictions(mod_mean, condition = c("se_acad_p1"), 
                      type = "response") + ggtitle("Counterfactual Shift of Outcome: se_acad_p1", "Holding all else Fixed")

plot <- ggarrange(g,g1,g2, ncol=1, nrow=3);

Hierarchical Marginal Effects

g = plot_predictions(hierarchical_mean_fit, condition = c("sup_parents_p3", "region"), type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: sup_parents_p3", "Holding all else Fixed")

g1 = plot_predictions(hierarchical_mean_fit, condition = c("sup_friends_p1", "region"), type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: sup_friends_p1", "Holding all else Fixed")

g2 = plot_predictions(hierarchical_mean_fit, condition = c("se_acad_p1", "region"), 
                      type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: se_acad_p1", "Holding all else Fixed")

plot <- ggarrange(g,g1,g2, ncol=1, nrow=3);

Confirmatory Factor Analysis

model_measurement <- "
# Measurement model
SUP_Parents =~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3
SUP_Friends =~ sup_friends_p1 + sup_friends_p2 + sup_friends_p3
SE_Academic =~ se_acad_p1 + se_acad_p2 + se_acad_p3
SE_Social =~ se_social_p1 + se_social_p2 + se_social_p3
LS  =~ ls_p1 + ls_p2 + ls_p3
"

model_measurement1 <- "
# Measurement model
SUP_Parents =~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3
SUP_Friends =~ a1*sup_friends_p1 + a2*sup_friends_p2 + a3*sup_friends_p3
SE_Academic =~ se_acad_p1 + se_acad_p2 + se_acad_p3
SE_Social =~ se_social_p1 + se_social_p2 + se_social_p3
LS  =~ ls_p1 + ls_p2 + ls_p3

a1 == a2 
a1 == a3
"


fit_mod <- cfa(model_measurement, data = df)
fit_mod_1<- cfa(model_measurement1, data = df)


cfa_models = list("full_measurement_model" = fit_mod, 
     "measurement_model_reduced" = fit_mod_1)
modelplot(cfa_models)

summary(fit_mod, fit.measures = TRUE, standardized = TRUE) 
lavaan 0.6-18 ended normally after 46 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        40

  Number of observations                           283

Model Test User Model:
                                                      
  Test statistic                               207.137
  Degrees of freedom                                80
  P-value (Chi-square)                           0.000

Model Test Baseline Model:

  Test statistic                              2586.651
  Degrees of freedom                               105
  P-value                                        0.000

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.949
  Tucker-Lewis Index (TLI)                       0.933

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)              -4394.212
  Loglikelihood unrestricted model (H1)      -4290.643
                                                      
  Akaike (AIC)                                8868.423
  Bayesian (BIC)                              9014.241
  Sample-size adjusted Bayesian (SABIC)       8887.401

Root Mean Square Error of Approximation:

  RMSEA                                          0.075
  90 Percent confidence interval - lower         0.062
  90 Percent confidence interval - upper         0.088
  P-value H_0: RMSEA <= 0.050                    0.001
  P-value H_0: RMSEA >= 0.080                    0.263

Standardized Root Mean Square Residual:

  SRMR                                           0.056

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Expected
  Information saturated (h1) model          Structured

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  SUP_Parents =~                                                        
    sup_parents_p1    1.000                               0.938    0.876
    sup_parents_p2    1.034    0.056   18.570    0.000    0.970    0.888
    sup_parents_p3    0.988    0.059   16.637    0.000    0.927    0.811
  SUP_Friends =~                                                        
    sup_friends_p1    1.000                               1.021    0.906
    sup_friends_p2    0.793    0.043   18.466    0.000    0.809    0.857
    sup_friends_p3    0.891    0.050   17.735    0.000    0.910    0.831
  SE_Academic =~                                                        
    se_acad_p1        1.000                               0.697    0.880
    se_acad_p2        0.806    0.049   16.278    0.000    0.561    0.819
    se_acad_p3        0.952    0.058   16.491    0.000    0.663    0.828
  SE_Social =~                                                          
    se_social_p1      1.000                               0.640    0.846
    se_social_p2      0.959    0.055   17.338    0.000    0.614    0.881
    se_social_p3      0.926    0.066   13.956    0.000    0.593    0.742
  LS =~                                                                 
    ls_p1             1.000                               0.677    0.729
    ls_p2             0.820    0.078   10.488    0.000    0.555    0.762
    ls_p3             0.744    0.109    6.809    0.000    0.504    0.459

Covariances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  SUP_Parents ~~                                                        
    SUP_Friends       0.134    0.064    2.094    0.036    0.140    0.140
    SE_Academic       0.219    0.046    4.717    0.000    0.335    0.335
    SE_Social         0.284    0.046    6.239    0.000    0.473    0.473
    LS                0.360    0.056    6.455    0.000    0.567    0.567
  SUP_Friends ~~                                                        
    SE_Academic       0.071    0.047    1.493    0.135    0.100    0.100
    SE_Social         0.195    0.046    4.262    0.000    0.299    0.299
    LS                0.184    0.053    3.500    0.000    0.267    0.267
  SE_Academic ~~                                                        
    SE_Social         0.274    0.036    7.525    0.000    0.613    0.613
    LS                0.212    0.039    5.407    0.000    0.449    0.449
  SE_Social ~~                                                          
    LS                0.330    0.043    7.650    0.000    0.761    0.761

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .sup_parents_p1    0.268    0.037    7.143    0.000    0.268    0.233
   .sup_parents_p2    0.253    0.038    6.595    0.000    0.253    0.212
   .sup_parents_p3    0.446    0.048    9.262    0.000    0.446    0.342
   .sup_friends_p1    0.228    0.040    5.663    0.000    0.228    0.179
   .sup_friends_p2    0.237    0.030    7.926    0.000    0.237    0.266
   .sup_friends_p3    0.371    0.042    8.811    0.000    0.371    0.310
   .se_acad_p1        0.141    0.022    6.487    0.000    0.141    0.226
   .se_acad_p2        0.154    0.018    8.648    0.000    0.154    0.329
   .se_acad_p3        0.202    0.024    8.397    0.000    0.202    0.315
   .se_social_p1      0.163    0.020    8.095    0.000    0.163    0.284
   .se_social_p2      0.109    0.016    6.782    0.000    0.109    0.224
   .se_social_p3      0.287    0.028   10.141    0.000    0.287    0.450
   .ls_p1             0.404    0.048    8.422    0.000    0.404    0.469
   .ls_p2             0.223    0.029    7.624    0.000    0.223    0.420
   .ls_p3             0.951    0.085   11.123    0.000    0.951    0.789
    SUP_Parents       0.880    0.098    8.937    0.000    1.000    1.000
    SUP_Friends       1.042    0.111    9.405    0.000    1.000    1.000
    SE_Academic       0.485    0.054    8.908    0.000    1.000    1.000
    SE_Social         0.410    0.048    8.459    0.000    1.000    1.000
    LS                0.458    0.072    6.323    0.000    1.000    1.000
g1 = plot_heatmap(cov(df[,  drivers]))

g2 = plot_heatmap(data.frame(fitted(fit_mod)$cov), title="Model Implied Covariances", "Fitted Values")

plot <- ggarrange(g1,g2, ncol=1, nrow=2);

Modification Indices

modindices(fit_mod, sort = TRUE, maximum.number = 6, standardized = FALSE, minimum.value = 5)
               lhs op            rhs     mi    epc
152 sup_friends_p1 ~~   se_social_p3 19.245  0.095
68     SUP_Friends =~          ls_p2 17.491  0.181
64     SUP_Friends =~   se_social_p1 17.210 -0.136
60     SUP_Friends =~ sup_parents_p3 16.063 -0.187
210          ls_p2 ~~          ls_p3 13.931 -0.140
57     SUP_Parents =~          ls_p3 13.057  0.329

Summary Global Fit Measures

summary_df = cbind(fitMeasures(fit_mod, c("chisq", "baseline.chisq", "cfi", "aic", "bic", "rmsea","srmr")),
      fitMeasures(fit_mod_1, c("chisq", "baseline.chisq", "cfi", "aic", "bic", "rmsea","srmr")))
colnames(summary_df) = c('Full Model', 'Reduced Model')

summary_df
                  Full Model Reduced Model
chisq           207.13746909  225.18095274
baseline.chisq 2586.65112811 2586.65112811
cfi               0.94876900    0.94230416
aic            8868.42313715 8882.46662079
bic            9014.24101305 9020.99360290
rmsea             0.07493739    0.07854933
srmr              0.05595908    0.05904842
semPlot::semPaths(fit_mod, whatLabels = 'est', intercepts = FALSE)

semPlot::semPaths(fit_mod_1, whatLabels = 'std', intercepts = FALSE)

Comparing the empirical and implied variance-covariance matrix

heat_df = data.frame(resid(fit_mod, type = "standardized")$cov) 
heat_df = heat_df |> as.matrix() |> melt()
colnames(heat_df) <- c("x", "y", "value")

g1 = heat_df  |> mutate(value = round(value, 2)) |>  ggplot(aes(x = x, y = y, fill = value)) +
  geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
 scale_fill_gradient2(
    high = 'dodgerblue4',
    mid = 'white',
    low = 'firebrick2'
  ) + theme(axis.text.x = element_text(angle=45)) + ggtitle("Residuals of the Sample Covariances and Model Implied Covariances", "A Visual Check of Model fit: Full Model")

heat_df = data.frame(resid(fit_mod_1, type = "standardized")$cov) 
heat_df = heat_df |> as.matrix() |> melt()
colnames(heat_df) <- c("x", "y", "value")

g2 = heat_df  |> mutate(value = round(value, 2)) |>  ggplot(aes(x = x, y = y, fill = value)) +
  geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
 scale_fill_gradient2(
    high = 'dodgerblue4',
    mid = 'white',
    low = 'firebrick2'
  ) + theme(axis.text.x = element_text(angle=45)) + ggtitle("Residuals of the Sample Covariances and Model Implied Covariances", "A Visual Check of Model fit: Reduced Model")

plot <- ggarrange(g1,g2, ncol=1, nrow=2);

anova(fit_mod)
Chi-Squared Test Statistic (unscaled)

          Df    AIC    BIC  Chisq Chisq diff Df diff         Pr(>Chisq)    
Saturated  0                 0.00                                          
Model     80 8868.4 9014.2 207.14     207.14      80 0.0000000000003209 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit_mod_1)
Chi-Squared Test Statistic (unscaled)

          Df    AIC  BIC  Chisq Chisq diff Df diff           Pr(>Chisq)    
Saturated  0               0.00                                            
Model     82 8882.5 9021 225.18     225.18      82 0.000000000000002776 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit_mod, fit_mod_1)

Chi-Squared Difference Test

          Df    AIC    BIC  Chisq Chisq diff   RMSEA Df diff Pr(>Chisq)    
fit_mod   80 8868.4 9014.2 207.14                                          
fit_mod_1 82 8882.5 9021.0 225.18     18.044 0.16836       2  0.0001208 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Structural Equation Models

model <- "
# Measurement model
SUP_Parents =~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3
SUP_Friends =~ sup_friends_p1 + sup_friends_p2 + sup_friends_p3
SE_Academic =~ se_acad_p1 + se_acad_p2 + se_acad_p3
SE_Social =~ se_social_p1 + se_social_p2 + se_social_p3
LS  =~ ls_p1 + ls_p2 + ls_p3

# Structural model 
# Regressions
LS ~ SE_Academic + SE_Social + SUP_Parents + SUP_Friends

# Residual covariances
SE_Academic ~~ SE_Social
"

fit_mod_sem <- sem(model, data = df)
modelplot(fit_mod_sem)

semPlot::semPaths(fit_mod_sem, whatLabels = 'std', intercepts = FALSE)

heat_df = data.frame(resid(fit_mod_sem, type = "standardized")$cov) 
heat_df = heat_df |> as.matrix() |> melt()
colnames(heat_df) <- c("x", "y", "value")

heat_df  |> mutate(value = round(value, 2)) |>  ggplot(aes(x = x, y = y, fill = value)) +
  geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
 scale_fill_gradient2(
    high = 'dodgerblue4',
    mid = 'white',
    low = 'firebrick2'
  ) + theme(axis.text.x = element_text(angle=45)) + ggtitle("Residuals of the Sample Covariances and Model Implied Covariances", "A Visual Check of Model fit")

```

Citation

BibTeX citation:
@online{forde,
  author = {Nathaniel Forde},
  title = {Confirmatory {Factor} {Analysis} and {Structural} {Equation}
    {Models}},
  langid = {en}
}
For attribution, please cite this work as:
Nathaniel Forde. n.d. “Confirmatory Factor Analysis and Structural Equation Models.”